# Load necessary libraries

# Prepare dataset
# Trait data (columns 2:9)
trait.data <- new.d

# Damage data (columns 2:3)
mean_damage_df.1 <- mean_damage_df.1[,-1]
damage_subset <- mean_damage_df.1

# Ensure response column is explicitly named
colnames(damage_subset)[1] <- "tip.label"  # Rename first column to 'response'
colnames(damage_subset)[2] <- "response"  # Rename first column to 'response'

# # Combine trait data and damage data into one dataset
# data <- cbind(damage_subset, trait.data)  # Ensure alignment of rows

# If there's an ID column, use merge instead:
data <- merge(damage_subset, trait.data, by = "tip.label")
data$tip.label <- as.factor(data$tip.label)

# Remove rows with NA values in either the response or predictors
data <- data %>% na.omit()
trait.data <- data[,c(14:21)]
# Generate all combinations of 1,2, and 3 traits
trait_combinations <- lapply(1:3, function(n) {
  combn(colnames(trait.data), n, simplify = FALSE)
}) %>%
  unlist(recursive = FALSE)  # Flatten the list of combinations

# Fit GLMs and store formulas and AIC values
results <- lapply(trait_combinations, function(predictor_set) {
  # Construct formula
  formula <- as.formula(paste("response ~", paste(predictor_set, collapse = " + ")))
  
  # Try fitting the model, handle errors gracefully
  tryCatch({
    model <- glm(formula, data = data,family = gaussian)  # Adjust family if necessary
    list(formula = formula, aic = AIC(model))  # Return formula and AIC
  }, error = function(e) {
    list(formula = formula, aic = NA)  # Return NA for failed models
  })
})

# Remove models with NA AICs
results <- results[!sapply(results, function(x) is.na(x$aic))]

# Extract AIC values and formulas into a data frame
results_df <- do.call(rbind, lapply(results, function(res) {
  data.frame(formula = deparse(res$formula), aic = res$aic)
}))

# Sort by AIC
results_df <- results_df %>% arrange(aic)

# Print the best model
cat("Best Model:\n")
print(results_df[1, ])

# Optional: Show all models sorted by AIC
cat("\nAll Models Sorted by AIC:\n")
print(results_df)

results_df <- results_df[-1,]
results_df <- results_df[-1,]
results_df <- results_df[-1,]

# Calculate ΔAIC
delta_aic <- results_df$aic - min(results_df$aic)
delta_aic

# Calculate Akaike weights
akaike_weights <- exp(-0.5 * delta_aic) / sum(exp(-0.5 * delta_aic))
akaike_weights

# Summary table
aic_table <- data.frame(
  Model = results_df$formula,
  AIC = results_df$aic,
  Delta_AIC = delta_aic,
  Akaike_Weight = akaike_weights
)

# Sort by AIC
aic_table <- aic_table[order(aic_table$AIC), ]
print(aic_table)

# Plot observed vs. predicted
# Best fit model:
model <- glm(response ~ P50 + moe + osmoticpot, data = data,family=gaussian)
vif(model)
# Too high
summary(model)

# Next best fit (if necessary)
model1 <- glm(response ~ P50 + tlp + lma, data = data,family=gaussian)
vif(model1)
# Too high
summary(model1)

# Next best fit (if necessary)
model3 <- glm(response ~ P50 + tlp + osmoticpot, data = data,family=gaussian)
vif(model3)
# Too high

# Next best fit (if necessary)
model4 <- glm(response ~ P50 + osmoticpot + hv, data = data,family=gaussian)
vif(model4)
summary(model4)

# Close...
# Calculate McFadden's R^2
1 - (model4$deviance / model4$null.deviance)

# # Next best fit (if necessary)
# model5 <- glm(response ~ P50 + osmoticpot, data = data,family=gaussian)
# vif(model5)
# summary(model5)
# 
# # Next best fit (if necessary)
# model6 <- glm(response ~ P50 + tlp + moe, data = data,family=gaussian)
# vif(model6)
# 
# # Next best fit (if necessary)
# model7 <- glm(response ~ P50 + moe + hv, data = data,family=gaussian)
# vif(model7)
# # less than 5.
# # Calculate McFadden's R^2
# 1 - (model7$deviance / model7$null.deviance)
# summary(model7)
# # Add predicted values to the dataset
# data$predicted <- predict(model7, type = "response")  # Predicted values
# 
# model7$residuals
# summary(model7)

# Check for collinearity
cor(data[, c("P50","osmoticpot", "hv")])

# Check for NAs
summary(data)

apply(data[, c("P50","osmoticpot","hv")], 2, sd)

# Check the residuals of the data
plot(model4$residuals)


plot(model4, which = 1)  # Residuals vs. Fitted plot
plot(model4, which = 2) # Q - Q plot

# Generate predicted values
data$predicted <- predict(model4, type = "response")

# Plot predicted vs observed
ggplot(data, aes(x = predicted, y = response)) +
  geom_point() +
  geom_abline(intercept = 0, slope = 1, linetype = "dashed", color = "red") +
  labs(x = "Predicted Values", y = "Observed Values", title = "Predicted vs. Observed Values") +
  theme_minimal()

# Residuals vs fitted values plot
data$residuals <- residuals(model4, type = "pearson")

ggplot(data, aes(x = predicted, y = residuals)) +
  geom_point() +
  geom_hline(yintercept = 0, linetype = "dashed", color = "red") +
  labs(x = "Fitted Values", y = "Residuals", title = "Residuals vs Fitted Values") +
  theme_minimal()

# Plot the effect of P50
p50_range <- seq(min(data$P50), max(data$P50), length.out = 100)
p50_data <- data.frame(P50 = p50_range, osmoticpot = mean(data$osmoticpot), hv = mean(data$hv))
p50_data$response <- predict(model4, newdata = p50_data, type = "response")

ggplot(p50_data, aes(x = P50, y = response)) +
  geom_line(color = "blue") +
  labs(x = "P50", y = "Predicted Response", title = "Effect of P50 on Response") +
  theme_minimal()

# Plot the effect of osmoticpotential
os_range <- seq(min(data$osmoticpot), max(data$osmoticpot), length.out = 100)
os_data <- data.frame(osmoticpot = os_range, P50 = mean(data$P50), hv = mean(data$hv))
os_data$response <- predict(model4, newdata = os_data, type = "response")

ggplot(os_data, aes(x = osmoticpot, y = response)) +
  geom_line(color = "red") +
  labs(x = "os", y = "Predicted Response", title = "Effect of os on Response") +
  theme_minimal()

# Plot the effect of hv
hv_range <- seq(min(data$hv), max(data$hv), length.out = 100)
hv_data <- data.frame(hv = hv_range, P50 = mean(data$P50), osmoticpot = mean(data$osmoticpot))
hv_data$response <- predict(model4, newdata = hv_data, type = "response")

ggplot(hv_data, aes(x = hv, y = response)) +
  geom_line(color = "purple") +
  labs(x = "HV", y = "Predicted Response", title = "Effect of HV on Response") +
  theme_minimal()

# model4